{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "9a538b6f-df3c-4d30-b89f-cafb5e2597e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import statsmodels.api as sm\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "24366129-a37a-4007-8d33-121d4385a932",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('/zfs/disinfo/dcweekly_contrast/story_text/dc_weekly_topic_scores.csv')\n",
    "# Make a binary indicator for whether we are in the AI-generated text period of time\n",
    "df['period'] = df.month.apply(lambda x: 1- int(x=='2023-06'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "becd539f-5d61-4c03-b6b4-13efe4963227",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                   guns   R-squared:                       0.374\n",
      "Model:                            OLS   Adj. R-squared:                  0.374\n",
      "Method:                 Least Squares   F-statistic:                     2017.\n",
      "Date:                Thu, 14 Mar 2024   Prob (F-statistic):               0.00\n",
      "Time:                        10:18:35   Log-Likelihood:                -470.34\n",
      "No. Observations:               10144   AIC:                             948.7\n",
      "Df Residuals:                   10140   BIC:                             977.6\n",
      "Df Model:                           3                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "==============================================================================\n",
      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "period         0.1946      0.007     26.513      0.000       0.180       0.209\n",
      "Israel         0.2840      0.007     39.771      0.000       0.270       0.298\n",
      "Ukraine        0.3399      0.007     50.799      0.000       0.327       0.353\n",
      "intercept      0.1745      0.006     27.118      0.000       0.162       0.187\n",
      "==============================================================================\n",
      "Omnibus:                       41.450   Durbin-Watson:                   1.869\n",
      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               41.915\n",
      "Skew:                          -0.156   Prob(JB):                     7.91e-10\n",
      "Kurtosis:                       2.962   Cond. No.                         5.35\n",
      "==============================================================================\n",
      "\n",
      "Notes:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
     ]
    }
   ],
   "source": [
    "# Perform linear regression\n",
    "# Add a constant term for the intercept\n",
    "df['intercept'] = 1\n",
    "\n",
    "# Specify the model\n",
    "independent_vars = ['period', 'Israel', 'Ukraine', 'intercept']\n",
    "dependent_var = 'guns'\n",
    "\n",
    "# Fit the model\n",
    "model = sm.OLS(df[dependent_var], df[independent_vars]).fit()\n",
    "\n",
    "# Print the summary results\n",
    "print(model.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "66de8bf3-e8ec-4ea4-b998-194a9c518335",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                  crime   R-squared:                       0.219\n",
      "Model:                            OLS   Adj. R-squared:                  0.219\n",
      "Method:                 Least Squares   F-statistic:                     947.0\n",
      "Date:                Thu, 14 Mar 2024   Prob (F-statistic):               0.00\n",
      "Time:                        10:19:07   Log-Likelihood:                 1349.7\n",
      "No. Observations:               10144   AIC:                            -2691.\n",
      "Df Residuals:                   10140   BIC:                            -2663.\n",
      "Df Model:                           3                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "==============================================================================\n",
      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "period         0.1192      0.006     19.443      0.000       0.107       0.131\n",
      "Israel         0.2291      0.006     38.386      0.000       0.217       0.241\n",
      "Ukraine        0.0821      0.006     14.673      0.000       0.071       0.093\n",
      "intercept      0.5575      0.005    103.664      0.000       0.547       0.568\n",
      "==============================================================================\n",
      "Omnibus:                      860.671   Durbin-Watson:                   1.939\n",
      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1092.155\n",
      "Skew:                          -0.787   Prob(JB):                    6.94e-238\n",
      "Kurtosis:                       3.326   Cond. No.                         5.35\n",
      "==============================================================================\n",
      "\n",
      "Notes:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
     ]
    }
   ],
   "source": [
    "# Assuming df is your DataFrame\n",
    "# Add a constant term for the intercept\n",
    "df['intercept'] = 1\n",
    "\n",
    "# Specify the model\n",
    "independent_vars = ['period', 'Israel', 'Ukraine', 'intercept']\n",
    "dependent_var = 'crime'\n",
    "\n",
    "# Fit the model\n",
    "model = sm.OLS(df[dependent_var], df[independent_vars]).fit()\n",
    "\n",
    "# Print the summary results\n",
    "print(model.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c65802ec-b6ad-47ec-97fa-7b1ed2a2023c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "catching_trolls",
   "language": "python",
   "name": "catching_trolls"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
